---
title: Preprocessing
description: Preprocessing the data for future use
---

In [None]:
from pathlib import Path

import pandas as pd
import polars as pl
from IPython.display import HTML, display

### Preprocessing match results

In [None]:
# | label: match_results
# Column names : 'league', 'country', 'season', 'date', 'home', 'home_goals', 'away', 'away_goals'
match_results = (
    pl.read_csv(
        Path("./../data/extracted_match_results.csv"),
        infer_schema=True,
        infer_schema_length=10000,
        try_parse_dates=True,
        null_values=["NA"],
    )
    .cast(
        {
            "home_goals": pl.Int64,
            "away_goals": pl.Int64,
        }
    )
    .with_columns(
        pl.col("country").replace(
            {
                "ENG": "England",
                "ITA": "Italy",
                "FRA": "France",
                "GER": "Germany",
                "ESP": "Spain",
            }
        )
    )
    .drop_nulls(subset=["home_goals", "away_goals"])
)

match_results.head()

league,country,season,date,home,home_goals,away,away_goals
str,str,i64,date,str,i64,str,i64
"""Premier League""","""England""",2015,2014-08-16,"""Manchester Utd""",1,"""Swansea City""",2
"""Premier League""","""England""",2015,2014-08-16,"""Stoke City""",0,"""Aston Villa""",1
"""Premier League""","""England""",2015,2014-08-16,"""Leicester City""",2,"""Everton""",2
"""Premier League""","""England""",2015,2014-08-16,"""QPR""",0,"""Hull City""",1
"""Premier League""","""England""",2015,2014-08-16,"""West Ham""",0,"""Tottenham""",1


In [None]:
match_results.describe()

statistic,league,country,season,date,home,home_goals,away,away_goals
str,str,str,f64,str,str,f64,str,f64
"""count""","""19163""","""19163""",19163.0,"""19163""","""19163""",19163.0,"""19163""",19163.0
"""null_count""","""0""","""0""",0.0,"""0""","""0""",0.0,"""0""",0.0
"""mean""",,,2019.780619,"""2019-10-18 03:50:28.147000""",,1.542452,,1.227626
"""std""",,,3.0606,,,1.309887,,1.170273
"""min""","""Bundesliga""","""England""",2015.0,"""2014-08-08""","""Ajaccio""",0.0,"""Ajaccio""",0.0
"""25%""",,,2017.0,"""2017-02-17""",,1.0,,0.0
"""50%""",,,2020.0,"""2019-10-25""",,1.0,,1.0
"""75%""",,,2022.0,"""2022-05-05""",,2.0,,2.0
"""max""","""Serie A""","""Spain""",2025.0,"""2025-02-03""","""Wolves""",10.0,"""Wolves""",9.0


In [None]:
def determine_result(goals: pl.Expr, opponent_goals: pl.Expr) -> pl.Expr:
    """Determine match result from goals scored perspective."""
    return (
        pl.when(goals > opponent_goals)
        .then(pl.lit("win"))
        .when(goals < opponent_goals)
        .then(pl.lit("loss"))
        .otherwise(pl.lit("draw"))
    )


# Add match results and reshape data
match_results = match_results.with_columns(
    determine_result(pl.col("home_goals"), pl.col("away_goals")).alias("home_result"),
    determine_result(pl.col("away_goals"), pl.col("home_goals")).alias("away_result"),
)

# Combine home and away results into single format
match_results = pl.concat(
    [
        match_results.select(
            "league",
            "country",
            "date",
            pl.col("home").alias("team"),
            pl.col("home_goals").alias("goals"),
            pl.col("home_result").alias("result"),
            pl.lit(True).alias("is_home"),
        ),
        match_results.select(
            "league",
            "country",
            "date",
            pl.col("away").alias("team"),
            pl.col("away_goals").alias("goals"),
            pl.col("away_result").alias("result"),
            pl.lit(False).alias("is_home"),
        ),
    ],
    how="vertical",
)

match_results

league,country,date,team,goals,result,is_home
str,str,date,str,i64,str,bool
"""Premier League""","""England""",2014-08-16,"""Manchester Utd""",1,"""loss""",true
"""Premier League""","""England""",2014-08-16,"""Stoke City""",0,"""loss""",true
"""Premier League""","""England""",2014-08-16,"""Leicester City""",2,"""draw""",true
"""Premier League""","""England""",2014-08-16,"""QPR""",0,"""loss""",true
"""Premier League""","""England""",2014-08-16,"""West Ham""",0,"""loss""",true
…,…,…,…,…,…,…
"""Ligue 1""","""France""",2025-02-02,"""Nice""",1,"""draw""",false
"""Ligue 1""","""France""",2025-02-02,"""Le Havre""",1,"""draw""",false
"""Ligue 1""","""France""",2025-02-02,"""Strasbourg""",0,"""loss""",false
"""Ligue 1""","""France""",2025-02-02,"""Nantes""",2,"""win""",false


### Preprocessing head coach

In [None]:
# | label: head_coach
# Column names : 'Team', 'League', 'Country', 'HeadCoach', 'Appointed', 'EndDate', 'Tenure', 'Matches', 'Wins', 'Draws', 'Losses'
head_coach = pl.read_csv(
    Path("./../data/extracted_head_coach.csv"),
    infer_schema=True,
    infer_schema_length=10000,
    try_parse_dates=True,
)
head_coach.head()

Unnamed: 0,Team,League,Country,HeadCoach,Appointed,EndDate,Tenure,Matches,Wins,Draws,Losses
0,Chelsea FC,Premier League,England,Mauricio Pochettino,2023-07-01,NaT,296,44,22,9,13
1,Chelsea FC,Premier League,England,Graham Potter,2022-09-08,2023-04-02,206,31,12,8,11
2,Chelsea FC,Premier League,England,Thomas Tuchel,2021-01-26,2022-09-07,589,100,63,19,18
3,Chelsea FC,Premier League,England,Frank Lampard,2019-07-04,2021-01-25,571,84,44,15,25
4,Chelsea FC,Premier League,England,Maurizio Sarri,2018-07-14,2019-06-30,351,63,40,11,12


In [None]:
head_coach.describe()

Unnamed: 0,Appointed,EndDate,Tenure,Matches,Wins,Draws,Losses
count,3532,3468,3532.0,3532.0,3532.0,3532.0,3532.0
mean,1982-05-15 03:53:36.761041920,1983-04-16 22:00:00,608.212344,51.585504,22.526331,13.006229,16.052945
min,1886-06-26 00:00:00,1893-08-01 00:00:00,-242.0,0.0,0.0,0.0,0.0
25%,1961-11-02 06:00:00,1963-06-30 00:00:00,186.0,10.0,2.0,2.0,4.0
50%,1987-07-01 00:00:00,1988-03-06 00:00:00,364.0,29.0,10.0,7.0,10.0
75%,2004-12-29 00:00:00,2005-06-30 00:00:00,730.0,67.0,28.0,17.0,21.0
max,2024-04-23 00:00:00,2024-06-30 00:00:00,14613.0,1490.0,895.0,323.0,272.0
std,,,815.283982,75.761523,39.422597,18.707847,20.552184


We need to filter head coach that were not active between 2015 to 2023.

In [None]:
latest_match = match_results.get_column("Date").max()
earliest_match = match_results.get_column("Date").min()

# Remove head coach that were appointed after 2023 season
head_coach = head_coach.filter(pl.col("Appointed") <= latest_match)
# Keep head coach that were dimissed after 2015 or that are still active
head_coach = head_coach.filter(
    (pl.col("EndDate") >= earliest_match) | (pl.col("EndDate").is_null())
)

:::{caution}
Head coach appointments records, extracted from TransferMarkt, contains data related to head coach in that specific club : tenure, number of matches played, number of matches won, etc. Those datapoint goes beyond our cut-off date of 2023 end season (2024-01-14).

One important thing is that those feature beyond cut-off date still relate to a head coach appointment we have in our records. This guarantees us that metrics such *number of club head coach managed* are properly reflected and still relate to head coach performance.

However, this create an asymetry in our data, as certain data point are limited by a time-frame and others not.
Moreover, we must be careful in how we compare these datapoint to others dataset such as match results as it could easily bias our statistical study.

The only way we combine this dataset to match result is by extracting head coach tenure on day of a match. This does not bias our statistical study as it is a feature that is properly reflected by our cut-off date.
:::

:::{note}
I have considered computing Head Coach performance metric from match results but we would lose information on prior records as well as creating imbalance data for plot such as linear regression of head coach performance over head coach tenure : a long standing coach which would not been dismissed soon after our start date would have a lower number of matches, thus a performance metric with higher variance that would bias linear regression due to long tenure.
:::

In [None]:
# Ensuring there is only 1 head coach at a time in any given team.
head_coach_temp = head_coach.copy()

# Sort data by 'Team' and 'Appointed'
head_coach_temp = head_coach_temp.sort_values(["Team", "Appointed"])
# Fillna with end date of 2022-2023 season
head_coach_temp = head_coach_temp.with_columns(pl.col("EndDate").fillna("2024-01-14"))
# Check if the next appointment is overlapping with the current one
head_coach_temp["OverlapDuration"] = (
    head_coach_temp.groupby("Team")["Appointed"].shift(-1) - head_coach_temp["EndDate"]
)
head_coach_temp = head_coach_temp.with_columns(
    Overlap=head_coach_temp["OverlapDuration"].dt.days < 0
)
# Show team with overlapping appointments
overlapping = head_coach_temp.filter(pl.col("Overlap"))

Nous avons trouvé {eval}`overlapping.shape[0]` enregistrements de mandats d'entraîneurs sportifs qui avaient lieu alors qu'un autre chef-entraîneur assurait l'entraînement de l'équipe. Ces enregistrements concernent les équipes suivante : {eval}`', '.join(overlapping['Team'].unique())`. Ces enregistrements sont exclus du jeu de données.

In [None]:
head_coach_temp.sort_values("OverlapDuration").head()

Unnamed: 0,Team,League,Country,HeadCoach,Appointed,EndDate,Tenure,Matches,Wins,Draws,Losses,OverlapDuration,Overlap
3422,Stade Reims,Ligue 1,France,David Guion,2017-05-22,2021-05-25,1464,157,67,46,44,-1059 days,True
3259,Stade Rennais FC,Ligue 1,France,Rolland Courbis,2016-01-20,2016-09-11,235,17,6,3,8,-72 days,True
3343,LOSC Lille,Ligue 1,France,René Girard,2013-07-01,2015-06-30,729,95,42,24,29,-35 days,True
2003,Torino FC,Serie A,Italy,Gian Piero Ventura,2011-06-06,2016-06-25,1846,217,85,64,68,-31 days,True
192,West Ham United,Premier League,England,Sam Allardyce,2011-07-01,2015-06-30,1460,181,69,44,68,-21 days,True


In [None]:
# Show overlapping records for teams with overlaps, using polars
head_coach_temp.filter(pl.col("Team").is_in(overlapping["Team"])).sort(
    ["Team", "Appointed"]
).select(["Team", "Appointed", "EndDate", "Overlap", "OverlapDuration"]).head(10)

Unnamed: 0,Team,Appointed,EndDate,Overlap,OverlapDuration
2248,FC Empoli,2012-06-25,2015-06-04,False,11 days
2247,FC Empoli,2015-06-15,2016-06-30,False,1 days
2246,FC Empoli,2016-07-01,2017-06-30,False,1 days
2245,FC Empoli,2017-07-01,2017-12-17,False,0 days
2244,FC Empoli,2017-12-17,2018-11-05,False,1 days
2243,FC Empoli,2018-11-06,2019-03-13,False,0 days
2242,FC Empoli,2019-03-13,2019-06-30,True,-12 days
2241,FC Empoli,2019-06-18,2019-11-12,False,2 days
2240,FC Empoli,2019-11-14,2020-01-26,False,0 days
2239,FC Empoli,2020-01-26,2020-08-11,False,8 days


In [None]:
# | label: hc_inconsistency

head_coach_temp.filter(pl.col("Team") == "Stade Reims").sort("Appointed").select(
    ["Team", "Appointed", "EndDate", "Overlap"]
).slice(2, 4)

Unnamed: 0,Team,Appointed,EndDate,Overlap
3423,Stade Reims,2016-07-01,2017-05-22,False
3422,Stade Reims,2017-05-22,2021-05-25,True
3421,Stade Reims,2018-07-01,2019-03-30,False
3420,Stade Reims,2021-06-23,2022-10-13,False


In [None]:
# Check records that are overlapping
head_coach[
    ~head_coach.index.isin(
        head_coach_temp[head_coach_temp["OverlapDuration"].dt.days <= -20].index
    )
]
# Drop overlapping records with a duration of more than 20 days
head_coach = head_coach[
    head_coach.index.isin(
        head_coach_temp[head_coach_temp["OverlapDuration"].dt.days > -20].index
    )
]

Next we would like to add to each head coach record the number of appointment he is completing.

In [None]:
# Using cronological information about appointment
# Add a column to head coach records that tells us about how many appointment head coach has done

head_coach = head_coach.sort_values(["HeadCoach", "Appointed"])
head_coach["AppointmentNumber"] = head_coach.groupby("HeadCoach").cumcount() + 1
head_coach[
    head_coach["HeadCoach"].isin(
        head_coach[head_coach["AppointmentNumber"] > 3]["HeadCoach"]
    )
].sort_values("HeadCoach").head()

Unnamed: 0,Team,League,Country,HeadCoach,Appointed,EndDate,Tenure,Matches,Wins,Draws,Losses,AppointmentNumber
2244,FC Empoli,Serie A,Italy,Aurelio Andreazzoli,2017-12-17,2018-11-05,323,35,17,10,8,1
2242,FC Empoli,Serie A,Italy,Aurelio Andreazzoli,2019-03-13,2019-06-30,109,11,5,1,5,2
1773,Genoa CFC,Serie A,Italy,Aurelio Andreazzoli,2019-07-01,2019-10-22,113,9,2,2,5,3
2237,FC Empoli,Serie A,Italy,Aurelio Andreazzoli,2021-06-21,2022-06-01,345,41,12,11,18,4
417,Real Madrid,LaLiga,Spain,Carlo Ancelotti,2013-07-01,2015-05-25,693,119,89,14,16,1


In [None]:
# Check if total_matches = wins + draws + losses
head_coach[
    head_coach["Matches"]
    != head_coach["Wins"] + head_coach["Draws"] + head_coach["Losses"]
].shape[0]

0

In [None]:
# Display Head Coach with lowest number of match
display(head_coach.sort_values("Matches").head(20))
# Remove head coach records with less than 5 matches
# We consider them to be not relevant as it a short stay of a head coach indicate either a temporary replacement or a very bad performance
head_coach = head_coach[head_coach["Matches"] > 5]

Unnamed: 0,Team,League,Country,HeadCoach,Appointed,EndDate,Tenure,Matches,Wins,Draws,Losses,AppointmentNumber
1696,SS Lazio,Serie A,Italy,Marcelo Bielsa,2016-07-06,2016-07-08,2,0,0,0,0,2
3421,Stade Reims,Ligue 1,France,Sébastien Desmazeau,2018-07-01,2019-03-30,272,0,0,0,0,1
2698,TSG 1899 Hoffenheim,Bundesliga,Germany,Marcel Rapp,2020-06-25,2020-06-30,5,1,1,0,0,1
2332,Hellas Verona,Serie A,Italy,Eusebio Di Francesco,2021-06-07,2021-09-14,99,4,1,0,3,3
1921,Udinese Calcio,Serie A,Italy,Igor Tudor,2018-04-24,2018-06-30,67,4,2,1,1,1
2761,VfB Stuttgart,Bundesliga,Germany,Nico Willig,2019-05-06,2019-06-30,55,4,1,3,0,1
2303,Chievo Verona,Serie A,Italy,Gian Piero Ventura,2018-10-10,2018-11-13,34,4,0,1,3,1
934,Granada CF,LaLiga,Spain,Miguel Ángel Portugal,2018-05-01,2018-06-30,60,5,2,0,3,1
2765,VfB Stuttgart,Bundesliga,Germany,Jos Luhukay,2016-07-01,2016-09-15,76,5,3,0,2,1
3390,Montpellier HSC,Ligue 1,France,Pascal Baills,2015-12-27,2016-01-26,30,5,1,0,4,1


#### Joining head coach with match results

In [None]:
# Compute number of team that are in head_coach but not in match_results
coach_teams = set(head_coach["Team"])
match_teams = set(match_results["Team"])

coach_team_not_in_match = coach_teams - match_teams
match_team_not_in_coach = match_teams - coach_teams

len(coach_team_not_in_match), len(match_team_not_in_coach)

(59, 153)

Les résultats de matchs contiennent {eval}`len(match_teams)` équipes et les mandats de coachs contiennent {eval}`len(coach_teams)` équipes. Cependant, le nom de certaines équipes est différent entre les deux jeux de données. Par exemple, « Liverpool » dans les résultats de match devient « Liverpool FC » dans les mandats des entraîneurs.

Il existe {eval}`len(coach_team_not_in_match)` équipes présentes dans les mandats de coachs qui n'ont pas de correspondance dans les résultats de match.

In [None]:
# Group teams by country
coach_teams_by_country = head_coach.groupby("Country")["Team"].unique()
match_teams_by_country = match_results.groupby("Country")["Team"].unique()

coach_teams_by_country, match_teams_by_country

(Country
 England    [Newcastle United, Crystal Palace, Chelsea FC,...
 France     [OGC Nice, FC Toulouse, Olympique Marseille, F...
 Germany    [1.FC Köln, 1.FSV Mainz 05, Eintracht Frankfur...
 Italy      [Frosinone Calcio, FC Empoli, Genoa CFC, Hella...
 Spain      [Granada CF, Valencia CF, Celta de Vigo, Real ...
 Name: Team, dtype: object,
 Country
 England    [Manchester Utd, Stoke City, Leicester City, Q...
 France     [Reims, Nice, Guingamp, Nantes, Evian, Lille, ...
 Germany    [Bayern Munich, Hoffenheim, Hannover 96, Köln,...
 Italy      [Chievo, Roma, Atalanta, Milan, Genoa, Palermo...
 Spain      [Málaga, Granada, Sevilla, Almería, Eibar, Cel...
 Name: Team, dtype: object)

In [None]:
from thefuzz import process

team_name_mapping = {}

# For each country
for country in coach_teams_by_country.index:
    # Get teams for this country
    coach_teams = coach_teams_by_country[country]
    match_teams = match_teams_by_country.get(country, [])

    # For each team in coach_teams
    for coach_team in coach_teams:
        # Find the best match in match_teams
        matching_scores = process.extract(coach_team, match_teams, limit=1)

        if len(matching_scores) != 0 and matching_scores[0][1] >= 60:
            team_name_mapping[coach_team] = matching_scores[0][0]
        else:
            team_name_mapping[coach_team] = None
            print(f"No match found for {coach_team} among {match_teams} in {country}")

In [None]:
# | label: team_match_table

name_match = pd.DataFrame(
    team_name_mapping.items(),
    columns=["Team in head coach records", "Team in match results"],
)
display(HTML(name_match.head().to_html(index=False)))

Team in head coach records,Team in match results
Newcastle United,Newcastle Utd
Crystal Palace,Crystal Palace
Chelsea FC,Chelsea
Arsenal FC,Arsenal
Liverpool FC,Liverpool


In [None]:
# Map head_coach['team'] with name_match
head_coach["Team"] = head_coach["Team"].map(team_name_mapping)
head_coach.head()

Unnamed: 0,Team,League,Country,HeadCoach,Appointed,EndDate,Tenure,Matches,Wins,Draws,Losses,AppointmentNumber
942,Granada,LaLiga,Spain,Abel Resino,2015-01-19,2015-05-01,102,15,2,5,8,1
2935,Köln,Bundesliga,Germany,Achim Beierlorzer,2019-07-01,2019-11-09,131,13,3,1,9,1
2716,Mainz 05,Bundesliga,Germany,Achim Beierlorzer,2019-11-18,2020-09-28,315,26,9,4,13,2
2819,Eint Frankfurt,Bundesliga,Germany,Adi Hütter,2018-07-01,2021-06-30,1095,141,67,31,43,1
2662,M'Gladbach,Bundesliga,Germany,Adi Hütter,2021-07-01,2022-06-30,364,37,14,9,14,2


Nous pouvons maintenant ajouter le nombres de jours au poste d’entraîneur avec les résultats de match.

In [None]:
# Check match with NaN goals
display(match_results[match_results["Goals"].isna()])
# Remove match with NaN goals
match_results = match_results[~match_results["Goals"].isna()]

Unnamed: 0,League,Country,Date,Team,Goals,Result,isHome
8955,Ligue 1,France,2020-03-18,Strasbourg,,draw,True
8956,Ligue 1,France,2020-03-13,Lyon,,draw,True
8957,Ligue 1,France,2020-03-14,Montpellier,,draw,True
8958,Ligue 1,France,2020-03-14,Nantes,,draw,True
8959,Ligue 1,France,2020-03-14,Strasbourg,,draw,True
...,...,...,...,...,...,...,...
25524,Ligue 1,France,2020-05-23,Nice,,draw,False
25525,Ligue 1,France,2020-05-23,Brest,,draw,False
25526,Ligue 1,France,2020-05-23,Paris S-G,,draw,False
25527,Ligue 1,France,2020-05-23,Lille,,draw,False


In [None]:
# | label: final_match_results

# Merge head_coach with match_results
match = match_results.merge(
    head_coach[["Team", "HeadCoach", "Appointed", "EndDate"]], on=["Team"], how="left"
)
# Put aside team that don't have a head coach
no_headcoach = match[match["HeadCoach"].isna()]
match = match[~match["HeadCoach"].isna()]
# Filter match_results_bis to keep only head coach that were appointed before the match and with no end date or end date after the match
match = match[
    (match["Date"] >= match["Appointed"])
    & ((match["Date"] <= match["EndDate"]) | match["EndDate"].isna())
]
# Join back the team that don't have a head coach
match = pd.concat([match, no_headcoach], ignore_index=True)
# Compute daysInPost
match["DaysInPost"] = (match["Date"] - match["Appointed"]).dt.days
match = match.drop(columns=["Appointed", "EndDate"])
match.head()

Unnamed: 0,League,Country,Date,Team,Goals,Result,isHome,HeadCoach,DaysInPost
0,Premier League,England,2014-08-16,Manchester Utd,1.0,loss,True,Louis van Gaal,33.0
1,Premier League,England,2014-08-16,Arsenal,2.0,win,True,Arsène Wenger,6528.0
2,Premier League,England,2014-08-17,Liverpool,2.0,win,True,Brendan Rodgers,777.0
3,Premier League,England,2014-08-17,Newcastle Utd,0.0,loss,True,Alan Pardew,1347.0
4,Premier League,England,2014-11-01,Newcastle Utd,1.0,win,True,Alan Pardew,1423.0


## Missing head coach data 

Il existe des matchs sur les lesquels nous n'avons pas d'information sur le coach en poste.

In [None]:
# Values count of coach record / no coach record per team
team_hc_count = match.groupby("Team")["HeadCoach"].count()
# Display list of team with no head coach record
team_no_hc_record = team_hc_count[team_hc_count == 0].index.tolist()
print(len(team_no_hc_record))
print(team_no_hc_record)

95
['Ajaccio', 'Alavés', 'Almería', 'Amiens', 'Angers', 'Arminia', 'Atlético Madrid', 'Auxerre', 'Benevento', 'Bochum', 'Bordeaux', 'Braunschweig', 'Brentford', 'Brescia', 'Brest', 'Brighton', 'Burnley', 'Caen', 'Cagliari', 'Cardiff City', 'Carpi', 'Cesena', 'Clermont Foot', 'Cremonese', 'Crotone', 'Cádiz', 'Córdoba', 'Dijon', 'Düsseldorf', 'Eibar', 'Elche', 'Espanyol', 'Evian', 'Freiburg', 'Fulham', 'Gazélec Ajaccio', 'Girona', 'Greuther Fürth', 'Guingamp', 'Hamburger SV', 'Hannover 96', 'Heidenheim', 'Hertha BSC', 'Holstein Kiel', 'Huddersfield', 'Huesca', 'Hull City', 'Ingolstadt 04', 'Inter', 'Karlsruher', 'La Coruña', 'Lecce', 'Leeds United', 'Leganés', 'Leicester City', 'Lens', 'Levante', 'Mallorca', 'Metz', 'Middlesbrough', 'Monza', 'Málaga', 'Nancy', 'Norwich City', "Nott'ham Forest", 'Nîmes', 'Nürnberg', 'Osasuna', 'Paderborn 07', 'Palermo', 'Parma', 'Pescara', 'QPR', 'RB Leipzig', 'Rennes', 'SPAL', 'Saint-Étienne', 'Salernitana', 'Sampdoria', 'Schalke 04', 'Sheffield Utd', 'S

### Saving preprocessed data

In [None]:
# Save match_results
match.to_csv(Path("./../data/match_results.csv"), index=False)
head_coach.to_csv(Path("./../data/head_coach.csv"), index=False)